# models_table_jop.R
rm(list = ls())
setwd("/Users/John/Dropbox/")

# --- Load required packages ---
library(texreg)
library(readr)
library(lfe)
library(pscl)
library(sandwich)
library(lmtest)
library(fixest)
library(tidyverse)

# --- Load data ---
df <- read_csv("JOP_Replication_Materials/data/processed/final_dataset.csv")
df_total <- read_csv("JOP_Replication_Materials/data/processed/total_isic.csv")

# --- Create GVC position variables ---
df2 <- df %>% filter(!is.na(median_share))
df2$downstream <- ifelse(df2$median_share <= 50, 1, 0)
df2$stratdown <- ifelse(df2$downstream == 1 & df2$strategic == 1, 1, 0)

# --- Models ---
ols1 <- felm(sqrtta ~ strategic*is_post | isic2 | 0 | isic + year, data = df)
ols2 <- felm(sqrtta ~ stratdown*is_post | isic2 | 0 | isic + year, data = df2)
ols3 <- felm(sqrtta ~ strategic*median_share + med_hhi_isic2 + med_soe_isic2 | year | 0 | isic + year, data = df)

psn1 <- fepois(isic_year ~ strategic*median_share | isic2 + year, data = df, cluster  = ~isic + year)
psn2 <- glm(isic_year ~ strategic*median_share + med_hhi_isic2 + med_soe_isic2, data = df, family = "poisson")
psn2.robust <- coeftest(psn2, vcov = vcovCL(psn2, cluster = ~isic + year))
psn3 <- fepois(isic_year ~ strategic*processing_share | isic2 + year, data = df, cluster = ~isic + year)

zip <- df %>% filter(!is.na(med_hhi_isic2), !is.na(med_soe_isic2), !is.na(median_share))
zip1 <- zeroinfl(isic_year ~ strategic*median_share + med_hhi_isic2 + med_soe_isic2 | strategic, data = zip)
zip1.robust <- coeftest(zip1, vcov = vcovCL(zip1, cluster = ~isic + year))

zip_total <- df_total %>% filter(!is.na(med_hhi_isic2), !is.na(med_soe_isic2), !is.na(median_share))
zip2 <- zeroinfl(total_isic ~ strategic*median_share + med_hhi_isic2 + med_soe_isic2 | strategic, data = zip_total)
zip2.robust <- coeftest(zip2, vcov = vcovCL(zip2, cluster = ~isic))

# --- Coefficient name mapping ---
coef_map <- list(
  "strategic" = "Strategic",
  "strategic:median_share" = "Strategic x Median Process. Share",
  "strategic:processing_share" = "Strategic x Processing Share",
  "is_post" = "Post-2006",
  "strategic:is_post" = "Strategic x Post-2006",
  "stratdown" = "Strategic + Downstream",
  "stratdown:is_post" = "Strat. + Down. x Post-2006",
  "zero_strategic" = "Strategic (Zero)",
  "count_strategic" = "Strategic (Count)",
  "count_strategic:median_share" = "Strat. x Med. Process Share (Count)",
  "median_share" = "Median Process. Share",
  "processing_share" = "Processing Share",
  "(Intercept)" = "Constant",
  "count_(Intercept)" = "Constant"
)

# --- Custom GOF rows ---
custom_gof <- list(
  "Num. Obs." = c(nobs(psn1), nobs(psn2), nobs(psn3), nobs(ols1), nobs(ols2), nobs(ols3), zip1$n, zip2$n),
  "Controls" = c("No", "Yes", "No", "No", "No", "Yes", "Yes", "Yes"),
  "Industry FE (2-Digit)" = c("Yes", "No", "Yes", "Yes", "Yes", "No", "No", "No"),
  "Year FE" = c("Yes", "No", "Yes", "No", "No", "Yes", "No", "No"),
  "R$^2$" = c(
    "", "", "", 
    round(summary(ols1)$r.squared, 3),
    round(summary(ols2)$r.squared, 3),
    round(summary(ols3)$r.squared, 3),
    "", ""
  ),
  "Adj. R$^2$" = c(
    "", "", "",
    round(summary(ols1)$adj.r.squared, 3),
    round(summary(ols2)$adj.r.squared, 3),
    round(summary(ols3)$adj.r.squared, 3),
    "", ""
  ),
  "Log Likelihood" = c(
    round(logLik(psn1), 3),
    round(logLik(psn2), 3),
    round(logLik(psn3), 3),
    round(logLik(ols1), 3),
    round(logLik(ols2), 3),
    round(logLik(ols3), 3),
    round(logLik(zip1), 3),
    round(logLik(zip2), 3)
  )
)

# --- Custom extractor for fixest::fepois (psn1) ---
extract.fixest <- function(model) {
  s <- summary(model)
  coefs <- s$coeftable
  
  gof <- numeric(0)
  gof_names <- character(0)
  gof_decimal <- logical(0)
  
  # Try to get logLik
  ll <- suppressWarnings(tryCatch(as.numeric(logLik(model)), error = function(e) NA_real_))
  if (!is.na(ll)) {
    gof <- c(gof, ll)
    gof_names <- c(gof_names, "Log Likelihood")
    gof_decimal <- c(gof_decimal, TRUE)
  }
  
  n <- tryCatch(nrow(model$model), error = function(e) NA_real_)
  if (length(n) == 1 && !is.na(n)) {
    gof <- c(gof, n)
    gof_names <- c(gof_names, "Num. Obs.")
    gof_decimal <- c(gof_decimal, FALSE)
  }
  
  createTexreg(
    coef.names = rownames(coefs),
    coef = coefs[, 1],
    se = coefs[, 2],
    pvalues = coefs[, 4],
    gof.names = gof_names,
    gof = gof,
    gof.decimal = gof_decimal
  )
}

# --- Custom extractor for glm/zeroinfl with cluster-robust SEs ---
extract.robust <- function(model, robust_se) {
  coefs <- coef(model)
  se <- robust_se[, 2]
  pval <- robust_se[, 4]
  names <- rownames(robust_se)
  
  gof <- numeric(0)
  gof_names <- character(0)
  gof_decimal <- logical(0)
  
  # Try logLik
  ll <- suppressWarnings(tryCatch(as.numeric(logLik(model)), error = function(e) NA_real_))
  if (!is.na(ll)) {
    gof <- c(gof, ll)
    gof_names <- c(gof_names, "Log Likelihood")
    gof_decimal <- c(gof_decimal, TRUE)
  }
  
  # Try Num. Obs.
  n <- suppressWarnings(tryCatch(nobs(model), error = function(e) NA_real_))
  if (length(n) == 1 && !is.na(n)) {
    gof <- c(gof, n)
    gof_names <- c(gof_names, "Num. Obs.")
    gof_decimal <- c(gof_decimal, FALSE)
  }
  
  createTexreg(
    coef.names = names,
    coef = coefs[names],
    se = se,
    pvalues = pval,
    gof.names = gof_names,
    gof = gof,
    gof.decimal = gof_decimal
  )
}

# --- Strip GOF from a model (removes duplicate rows like "Num. Obs." and "Log Likelihood") ---
strip_all_gof <- function(model) {
  model@gof.names <- character(0)
  model@gof <- numeric(0)
  model@gof.decimal <- logical(0)
  return(model)
}

# --- Model list ---
models <- list(
  strip_all_gof(extract.fixest(psn1)),
  strip_all_gof(extract.robust(psn2, psn2.robust)),
  strip_all_gof(extract.fixest(psn3)),
  strip_all_gof(texreg::extract(ols1)),
  strip_all_gof(texreg::extract(ols2)),
  strip_all_gof(texreg::extract(ols3)),
  strip_all_gof(extract.robust(zip1, zip1.robust)),
  strip_all_gof(extract.robust(zip2, zip2.robust))
)

# --- Create LaTeX file from texreg ---
tex_output <- "JOP_Replication_Materials/appendix/output/D_table_4_plain.tex"

texreg(
  models,
  file = tex_output,
  custom.coef.map = coef_map,
  stars = c(0.1, 0.05, 0.01),
  digits = 3,
  custom.gof.rows = custom_gof,
  caption = "Regression Results on Tech Absorption",
  label = "tab:regression_results"
)

# --- Wrap LaTeX for full PDF ---
table_tex_raw <- readLines(tex_output)
start_line <- grep("\\\\begin\\{tabular\\}", table_tex_raw)
end_line <- grep("\\\\end\\{tabular\\}", table_tex_raw)
table_tex <- table_tex_raw[start_line:end_line]

latex_wrapper <- c(
  "\\documentclass[11pt]{article}",
  "\\usepackage[margin=1in]{geometry}",
  "\\usepackage{booktabs}",
  "\\usepackage{caption}",
  "\\usepackage{graphicx}",
  "\\usepackage{float}",
  "\\usepackage{adjustbox}",
  "\\usepackage{amsmath}",
  "\\usepackage{longtable}",
  "\\usepackage{placeins}",
  "\\usepackage{mathptmx}",  
  "\\renewcommand{\\arraystretch}{0.6}",
  "\\begin{document}",
  "\\begin{table}[H]",
  "\\centering",
  "\\caption{Results from Regression with Poisson, OLS, and Zero-Inflated Poisson Models}",
  "\\vspace{0.5em}",
  "\\resizebox{\\textwidth}{!}{%",
  table_tex,
  "}",
  "\\end{table}",
  "\\FloatBarrier",
  "\\end{document}"
)

wrapped_file <- "JOP_Replication_Materials/appendix/output/D_table_4_wrapped.tex"
writeLines(latex_wrapper, wrapped_file)

# --- Compile PDF ---
tools::texi2pdf(wrapped_file, clean = TRUE)
file.rename("D_table_4_wrapped.pdf", "JOP_Replication_Materials/appendix/output/D_table_4_wrapped.pdf")
browseURL("JOP_Replication_Materials/appendix/output/D_table_4_wrapped.pdf")

